Redacts a PDF document stored in a file.
Public Shared Sub Redact(
ByVal fileName As String,
ByVal password As String,
ByVal redacts As IList(Of PDFRedact
))
fileName
Name of the file containing an existing PDF document to be redacted.
password
The password to use if fileName contains an encrypted PDF file.
redacts
One or more PDF redact objects.
Redaction can be used to remove sensitive information from an existing PDF document.
This method quickly redacts an existing PDF document in place by removing any character, image, or shape that intersects with any of the PDFRedact.Bounds of redacts. The resulting PDF is not re-generated and therefore will maintain the same exact compression, metadata, fonts and any other resources.
Use the following code to redact all data in a PDF page at location 0, 0 to 100,100:
// Create a PDF redaction object
var redact = new PDFRedact(0, 0, 100, 100);
// Redact the file:
PDFFile.Redact(pdfFileName, new List<PDFRedact> { redact });
This example will parse the text of a PDF file, find the locations of all items containing the word "LEADTOOLS" and redacts them.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Controls;
using Leadtools.Drawing;
using Leadtools.ImageProcessing;
using Leadtools.Pdf;
using Leadtools.Svg;
using Leadtools.WinForms;
private static void RedactExample()
{
const string toRedact = "LEADTOOLS";
// Make a copy of 'leadtools.pdf' installed with LEADTOOLS
string imagesDir = @"C:\Users\Public\Documents\LEADTOOLS Images";
string pdfFileName = Path.Combine(imagesDir, "leadtools-redacted.pdf");
File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, true);
// We will use PDFDocument to find the position of the words to redact
// Find any text containing the word "LEADTOOLS" in the document
var allWords = new List<MyPDFWord>();
using (var pdfDocument = new PDFDocument(pdfFileName))
{
pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1);
// Build the words for each page from PDFDocumentPage.Objects
foreach (PDFDocumentPage pdfPage in pdfDocument.Pages)
{
IList<MyPDFWord> words = GetPageWords(pdfPage);
allWords.AddRange(words);
}
}
// Now create a PDFRedact object for each word that contains the value we want to redact
string toRedactLower = toRedact.ToLowerInvariant();
var pdfRedacts = new List<PDFRedact>();
foreach (MyPDFWord word in allWords)
{
if (word.Value.ToLowerInvariant().Contains(toRedactLower))
{
Console.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}");
var pdfRedact = new PDFRedact();
pdfRedact.PageNumber = word.PageNumber;
pdfRedact.Bounds = new PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom);
pdfRedacts.Add(pdfRedact);
}
}
// Redact the document
PDFFile.Redact(pdfFileName, null, pdfRedacts);
// Finally, verify that the redact PDF does not have the redacted words anymore
using (var pdfDocument = new PDFDocument(pdfFileName))
{
pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1);
// Build the words for each page from PDFDocumentPage.Objects
foreach (PDFDocumentPage pdfPage in pdfDocument.Pages)
{
IList<MyPDFWord> words = GetPageWords(pdfPage);
foreach (MyPDFWord word in words)
{
Debug.Assert(!word.Value.ToLowerInvariant().Contains(toRedactLower));
}
}
}
}
// Class to define a word in a PDF page
class MyPDFWord
{
// Page number
public int PageNumber;
// The value as a string
public string Value;
// Its location in the PDF coordinate
public LeadRectD Bounds;
}
private static IList<MyPDFWord> GetPageWords(PDFDocumentPage pdfPage)
{
var words = new List<MyPDFWord>();
IList<PDFObject> objects = pdfPage.Objects;
if (objects == null || objects.Count == 0)
return words;
int objectIndex = 0;
int objectCount = objects.Count;
double pageHeight = pdfPage.Height;
// Loop through all the objects
while (objectIndex < objectCount)
{
// Find the total bounding rectangle, begin and end index of the next word
LeadRectD wordBounds = LeadRectD.Empty;
int firstObjectIndex = objectIndex;
// Loop till we reach EndOfWord or reach the end of the objects
bool more = true;
while (more)
{
PDFObject pdfObject = objects[objectIndex];
// Is it text?
if (pdfObject.ObjectType == PDFObjectType.Text)
{
PDFRect pdfBounds = pdfObject.Bounds;
// objectBounds are in bottom-left coordinate, convert it to top-left
LeadRectD objectBounds = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom);
// Add the bounding rectangle of this object
if (wordBounds.IsEmpty)
wordBounds = objectBounds;
else
wordBounds = LeadRectD.UnionRects(wordBounds, objectBounds);
}
else
{
firstObjectIndex = objectIndex + 1;
}
objectIndex++;
more = (objectIndex < objectCount) && !pdfObject.TextProperties.IsEndOfWord && !pdfObject.TextProperties.IsEndOfLine;
}
if (firstObjectIndex == objectIndex)
{
continue;
}
// From the begin and end index, collect the characters into a string
StringBuilder sb = new StringBuilder();
for (int i = firstObjectIndex; i < objectIndex; i++)
{
if (objects[i].ObjectType == PDFObjectType.Text)
sb.Append(objects[i].Code);
}
// Add this word to the list
PDFObject lastObject = objects[objectIndex - 1];
var word = new MyPDFWord();
word.PageNumber = pdfPage.PageNumber;
word.Value = sb.ToString();
word.Bounds = wordBounds;
words.Add(word);
}
return words;
}
Imports Leadtools
Imports Leadtools.Codecs
Imports Leadtools.Pdf
Imports Leadtools.WinForms
Imports Leadtools.Svg
Imports Leadtools.ImageProcessing
Private Shared Sub RedactExample()
Const toRedact As String = "LEADTOOLS"
' Make a copy of 'leadtools.pdf' installed with LEADTOOLS
Dim imagesDir As String = "C:\Users\Public\Documents\LEADTOOLS Images"
Dim pdfFileName As String = Path.Combine(imagesDir, "leadtools-redacted.pdf")
File.Copy(Path.Combine(imagesDir, "leadtools.pdf"), pdfFileName, True)
' We will use PDFDocument to find the position of the words to redact
' Find any text containing the word "LEADTOOLS" in the document
Dim allWords As New List(Of MyPDFWord)()
Using pdfDocument As New PDFDocument(pdfFileName)
pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1)
' Build the words for each page from PDFDocumentPage.Objects
For Each pdfPage As PDFDocumentPage In pdfDocument.Pages
Dim words As IList(Of MyPDFWord) = GetPageWords(pdfPage)
allWords.AddRange(words)
Next
End Using
' Now create a PDFRedact object for each word that contains the value we want to redact
Dim toRedactLower As String = toRedact.ToLowerInvariant()
Dim pdfRedacts As New List(Of PDFRedact)()
For Each word As MyPDFWord In allWords
If word.Value.ToLowerInvariant().Contains(toRedactLower) Then
Console.WriteLine($"Found {word.Value} at {word.Bounds} in page {word.PageNumber}")
Dim PDFRedact As New PDFRedact()
PDFRedact.PageNumber = word.PageNumber
PDFRedact.Bounds = New PDFRect(word.Bounds.Left, word.Bounds.Top, word.Bounds.Right, word.Bounds.Bottom)
pdfRedacts.Add(PDFRedact)
End If
Next
' Redact the document
PDFFile.Redact(pdfFileName, Nothing, pdfRedacts)
' Finally, verify that the redact PDF does not have the redacted words anymore
Using pdfDocument As New PDFDocument(pdfFileName)
pdfDocument.ParsePages(PDFParsePagesOptions.Objects, 1, -1)
' Build the words for each page from PDFDocumentPage.Objects
For Each pdfPage As PDFDocumentPage In pdfDocument.Pages
Dim words As IList(Of MyPDFWord) = GetPageWords(pdfPage)
For Each word As MyPDFWord In words
Debug.Assert(Not word.Value.ToLowerInvariant().Contains(toRedactLower))
Next
Next
End Using
End Sub
' Class to define a word in a PDF page
Class MyPDFWord
' Page number
Public PageNumber As Integer
' The value as a string
Public Value As String
' Its location in the PDF coordinate
Public Bounds As LeadRectD
End Class
Private Shared Function GetPageWords(pdfPage As PDFDocumentPage) As IList(Of MyPDFWord)
Dim words As New List(Of MyPDFWord)()
Dim objects As IList(Of PDFObject) = pdfPage.Objects
If (IsNothing(objects) OrElse objects.Count = 0) Then
Return words
End If
Dim objectIndex As Integer = 0
Dim objectCount As Integer = objects.Count
Dim pageHeight As Double = pdfPage.Height
' Loop through all the objects
While objectIndex < objectCount
' Find the total bounding rectangle, begin and end index of the next word
Dim wordBounds As LeadRectD = LeadRectD.Empty
Dim firstObjectIndex As Integer = objectIndex
' Loop till we reach EndOfWord or reach the end of the objects
Dim more As Boolean = True
While more
Dim pdfObject As PDFObject = objects(objectIndex)
' Is it text?
If pdfObject.ObjectType = PDFObjectType.Text Then
Dim pdfBounds As PDFRect = pdfObject.Bounds
' objectBounds are in bottom-left coordinate, convert it to top-left
Dim objectBounds As LeadRectD = LeadRectD.FromLTRB(pdfObject.Bounds.Left, pageHeight - pdfObject.Bounds.Top, pdfObject.Bounds.Right, pageHeight - pdfObject.Bounds.Bottom)
' Add the bounding rectangle of this object
If wordBounds.IsEmpty Then
wordBounds = objectBounds
Else
wordBounds = LeadRectD.UnionRects(wordBounds, objectBounds)
End If
Else
firstObjectIndex = objectIndex + 1
End If
objectIndex = objectIndex + 1
more = (objectIndex < objectCount) AndAlso Not pdfObject.TextProperties.IsEndOfWord AndAlso Not pdfObject.TextProperties.IsEndOfLine
End While
If firstObjectIndex = objectIndex Then
Continue While
End If
' From the begin and end index, collect the characters into a string
Dim sb As New StringBuilder()
For i As Integer = firstObjectIndex To objectIndex - 1
If objects(i).ObjectType = PDFObjectType.Text Then
sb.Append(objects(i).Code)
End If
Next
' Add this word to the list
Dim lastObject As PDFObject = objects(objectIndex - 1)
Dim word As New MyPDFWord()
word.PageNumber = pdfPage.PageNumber
word.Value = sb.ToString()
word.Bounds = wordBounds
words.Add(word)
End While
Return words
End Function
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document